### PACKAGES
library(MatchIt)
library(haven)
library(dplyr)
library(cobalt)
library(plm)
library(sandwich)
library(lmtest)
library(estimatr)
library(stargazer)
library(ggplot2)
#### DETAILS ####
##GOAL: Carry out the matching analysis on the df that also has the village data 

## Set working directory to "JOP Replication files" folder on your computer

#1. MATCHING USING nearest neighbor  
## Load the data that you saved from  stata - matching_df.do (in the Updates analysis)

## set seed for consistency with resutls in paper 

match_df <- read_dta("DATA FILES TO SHARE/TEMP_FILES/match_ew_vill.dta")

set.seed(4256)

MISSING <- is.na(match_df$dil_dummy) | is.na(match_df$EW_edu)|is.na(match_df$EW_Age)|
  is.na(match_df$mig_pop)| is.na(match_df$INCOME)| is.na(match_df$rel_muslim)|is.na(match_df$sc)| is.na(match_df$obc)|
  is.na(match_df$st)| is.na(match_df$dist_pucca)| is.na(match_df$dil_dummy)|
  is.na(match_df$vill_bus)|is.na(match_df$dist_town)| is.na(match_df$vill_elec_hrs)| is.na(match_df$EW_child)


match_df<- subset(match_df, subset = !MISSING)

m.out <- matchit(w2_abshusband_dummy~ mig_pop+EW_edu+EW_Age+EW_health+EW_child+sc+st+obc+rel_muslim+
                   dil_dummy+INCOME+dist_pucca+vill_elec_hrs+dist_town+vill_bus, data = match_df,method = "nearest",ratio = 5)

summary(m.out)

new.names <- c(
  dil_dummy = "Daughter-in-law",
  EW_edu = "Years of education",
  EW_Age = "Age (yrs.)",
  mig_pop = "Migrants in the village",
  sc = "Scheduled Caste",
  st = "Scheduled Tribe",
  vill_elec_hrs = "Electricity in Village (hrs)",
  EW_child = "No. of children",
  vill_bus = "Connectivity to village by bus",
  rel_muslim = "Muslim",
  dist_town = "Distance to nearest town",
  log_inc = "Log household income",
  EW_health = "Health score",
  obc = "Other Backward Caste",
  dist_pucca = "Distance to road"
)


bal.tab(m.out, thresholds = c(m = .05), un = TRUE)
bal.plot(m.out, var.name = "distance",
         mirror = TRUE, type = "histogram")
p<- love.plot(m.out, stats = c("mean.diffs", "variance.ratios"),
          thresholds = c(m = .1, v = 2), abs = TRUE, 
          var.names = new.names,
          colors = c("red","blue"),
          shapes = c("triangle filled", "circle filled"),
          binary = "std",
          var.order = "unadjusted")
p
#save this data in a df 
ggsave("OUTPUT/GRAPHS/Balance_plot.png", plot =p)
match_ew1 <- match.data(m.out)
write_dta(match_ew1, "DATA FILES TO SHARE/TEMP_FILES/match_w1.dta")
